Load the diamonds.csv data set and undertake an initial exploration of the data. You will find a description of the meanings of the variables on the relevant Kaggle page

library(readr)
diamonds <- read_csv("data/diamonds.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  X1 = col_double(),
  carat = col_double(),
  cut = col_character(),
  color = col_character(),
  clarity = col_character(),
  depth = col_double(),
  table = col_double(),
  price = col_double(),
  x = col_double(),
  y = col_double(),
  z = col_double()
)
library(GGally)
Loading required package: ggplot2
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     

Attaching package: ‘ggplot2’

The following object is masked _by_ ‘.GlobalEnv’:

    diamonds

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
ggpairs(diamonds)

library(tidyverse)
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     
── Attaching packages ───────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.0     ✔ purrr   0.3.2
✔ tibble  2.1.3     ✔ dplyr   0.8.2
✔ tidyr   0.8.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ──────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
glimpse(diamonds)
Observations: 53,940
Variables: 10
$ carat   <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.30, 0…
$ cut     <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Very Goo…
$ color   <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I, E, H…
$ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, SI1, …
$ depth   <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64.0, 6…
$ table   <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58, 54,…
$ price   <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 342, 34…
$ x       <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.25, 3…
$ y       <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.28, 3…
$ z       <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.73, 2…
summary(diamonds)
       X1            carat            cut               color          
 Min.   :    1   Min.   :0.2000   Length:53940       Length:53940      
 1st Qu.:13486   1st Qu.:0.4000   Class :character   Class :character  
 Median :26970   Median :0.7000   Mode  :character   Mode  :character  
 Mean   :26970   Mean   :0.7979                                        
 3rd Qu.:40455   3rd Qu.:1.0400                                        
 Max.   :53940   Max.   :5.0100                                        
   clarity              depth           table           price             x         
 Length:53940       Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
 Class :character   1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
 Mode  :character   Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
                    Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
                    3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
                    Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
       y                z         
 Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 4.720   1st Qu.: 2.910  
 Median : 5.710   Median : 3.530  
 Mean   : 5.735   Mean   : 3.539  
 3rd Qu.: 6.540   3rd Qu.: 4.040  
 Max.   :58.900   Max.   :31.800  
library(psych)
pairs.panels(diamonds[c("X1", "carat", "cut", "color", "clarity", "depth", "table", "price")])

pairs.panels(diamonds[c("X1", "carat", "x", "y", "z", "price")])

We expect the carat of the diamonds to be strong correlated with the physical dimensions x, y and z. Use ggpairs() to investigate correlations between these four variables.

pairs.panels(diamonds[c("carat", "x", "y", "z")])

So, we do find significant correlations. Let’s drop columns x, y and z from the dataset, in preparation to use only carat going forward.

diamonds_trim <- diamonds %>%
  select(-c("x", "y", "z"))
diamonds_trim

We are interested in developing a regression model for the price of a diamond in terms of the possible predictor variables in the dataset.

Use ggpairs() to investigate correlations between price and the predictors (this may take a while to run, don’t worry, make coffee or something).

Perform further ggplot visualisations of any significant correlations you find.

ggpairs(diamonds_trim)

pairs.panels(diamonds[c("carat", "cut", "clarity", "color", "price")])

diamonds_trim %>%
  ggplot(aes(x = clarity, y = price)) +
  geom_boxplot()

diamonds_trim %>%
  ggplot(aes(x = cut, y = price)) +
  geom_boxplot()

diamonds_trim %>%
  ggplot(aes(x = color, y = price)) +
  geom_boxplot()

Shortly we may try a regression fit using one or more of the categorical predictors cut, clarity and color, so let’s investigate these predictors:

Investigate the factor levels of these predictors. How many dummy variables do you expect for each of them?

Use the dummy_cols() function in the fastDummies package to generate dummies for these predictors and check the number of dummies in each case.

clarity = 8 cut = 5 color = 7

library(fastDummies)
diamonds_trim <- dummy_cols(diamonds_trim, 
                            select_columns = c("clarity"), 
                            remove_first_dummy = TRUE)

diamonds_trim <- subset(diamonds_trim, select = -c("clarity"))
Error in -c("clarity") : invalid argument to unary operator
diamonds_trim <- dummy_cols(diamonds_trim, 
                            select_columns = c("cut"), 
                            remove_first_dummy = TRUE)
diamonds_trim <- dummy_cols(diamonds_trim, 
                            select_columns = c("color"), 
                            remove_first_dummy = TRUE)
diamonds_trim

Going forward we’ll let R handle dummy variable creation for categorical predictors in regression fitting (remember lm() will generate the correct numbers of dummy levels automatically, absorbing one of the levels into the intercept as a reference level)

First, we’ll start with simple linear regression. Regress price on carat and check the regression diagnostics.

Run a regression with one or both of the predictor and response variables log() transformed and recheck the diagnostics. Do you see any improvement?

Let’s use log() transformations of both predictor and response. Next, experiment with adding a single categorical predictor into the model. Which categorical predictor is best? [Hint - investigate r2 values]

Interpret the fitted coefficients for the levels of your chosen categorical predictor. Which level is the reference level? Which level shows the greatest difference in price from the reference level? [Hints - remember we are regressing the log(price) here, and think about what the presence of the log(carat) predictor implies. We’re not expecting a mathematical explanation]

lin_model <- lm(price ~ carat, data = diamonds_trim)

plot(lin_model)

summary(lin_model)

Call:
lm(formula = price ~ carat, data = diamonds_trim)

Residuals:
     Min       1Q   Median       3Q      Max 
-18585.3   -804.8    -18.9    537.4  12731.7 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2256.36      13.06  -172.8   <2e-16 ***
carat        7756.43      14.07   551.4   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1549 on 53938 degrees of freedom
Multiple R-squared:  0.8493,    Adjusted R-squared:  0.8493 
F-statistic: 3.041e+05 on 1 and 53938 DF,  p-value: < 2.2e-16
lin_model_log_price <- lm(log(price) ~ carat, data = diamonds_trim)

plot(lin_model_log_price)

summary(lin_model_log_price)

Call:
lm(formula = log(price) ~ carat, data = diamonds_trim)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.2844 -0.2449  0.0335  0.2578  1.5642 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 6.215021   0.003348    1856   <2e-16 ***
carat       1.969757   0.003608     546   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3972 on 53938 degrees of freedom
Multiple R-squared:  0.8468,    Adjusted R-squared:  0.8468 
F-statistic: 2.981e+05 on 1 and 53938 DF,  p-value: < 2.2e-16
lin_model_log_price_carat <- lm(log(price) ~ log(carat), data = diamonds_trim)

plot(lin_model_log_price_carat)

summary(lin_model_log_price_carat)

Call:
lm(formula = log(price) ~ log(carat), data = diamonds_trim)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.50833 -0.16951 -0.00591  0.16637  1.33793 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 8.448661   0.001365  6190.9   <2e-16 ***
log(carat)  1.675817   0.001934   866.6   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2627 on 53938 degrees of freedom
Multiple R-squared:  0.933, Adjusted R-squared:  0.933 
F-statistic: 7.51e+05 on 1 and 53938 DF,  p-value: < 2.2e-16
lin_model_log_price_carat_clarity <- lm(log(price) ~ log(carat) + clarity, 
                                        data = diamonds_trim)

plot(lin_model_log_price_carat_clarity)

summary(lin_model_log_price_carat_clarity)

Call:
lm(formula = log(price) ~ log(carat) + clarity, data = diamonds_trim)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.97521 -0.12085  0.01048  0.12561  1.85854 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 7.768115   0.006940 1119.25   <2e-16 ***
log(carat)  1.806424   0.001514 1193.23   <2e-16 ***
clarityIF   1.114625   0.008376  133.07   <2e-16 ***
claritySI1  0.624558   0.007163   87.19   <2e-16 ***
claritySI2  0.479658   0.007217   66.46   <2e-16 ***
clarityVS1  0.820461   0.007306  112.30   <2e-16 ***
clarityVS2  0.775248   0.007197  107.72   <2e-16 ***
clarityVVS1 1.028298   0.007745  132.77   <2e-16 ***
clarityVVS2 0.979221   0.007529  130.05   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1888 on 53931 degrees of freedom
Multiple R-squared:  0.9654,    Adjusted R-squared:  0.9654 
F-statistic: 1.879e+05 on 8 and 53931 DF,  p-value: < 2.2e-16
lin_model_log_price_carat_cut <- lm(log(price) ~ log(carat) + cut, 
                                        data = diamonds_trim)

plot(lin_model_log_price_carat_cut)

summary(lin_model_log_price_carat_cut)

Call:
lm(formula = log(price) ~ log(carat) + cut, data = diamonds_trim)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.52247 -0.16484 -0.00587  0.16087  1.38115 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  8.200125   0.006343 1292.69   <2e-16 ***
log(carat)   1.695771   0.001910  887.68   <2e-16 ***
cutGood      0.163245   0.007324   22.29   <2e-16 ***
cutIdeal     0.317212   0.006632   47.83   <2e-16 ***
cutPremium   0.238217   0.006716   35.47   <2e-16 ***
cutVery Good 0.240753   0.006779   35.52   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2545 on 53934 degrees of freedom
Multiple R-squared:  0.9371,    Adjusted R-squared:  0.9371 
F-statistic: 1.607e+05 on 5 and 53934 DF,  p-value: < 2.2e-16
lin_model_log_price_carat_color <- lm(log(price) ~ log(carat) + color, 
                                        data = diamonds_trim)

plot(lin_model_log_price_carat_color)

summary(lin_model_log_price_carat_color)

Call:
lm(formula = log(price) ~ log(carat) + color, data = diamonds_trim)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.49987 -0.14888  0.00257  0.15316  1.27815 

Coefficients:
             Estimate Std. Error  t value Pr(>|t|)    
(Intercept)  8.572034   0.003051 2809.531  < 2e-16 ***
log(carat)   1.728631   0.001814  952.727  < 2e-16 ***
colorE      -0.025460   0.003748   -6.793 1.11e-11 ***
colorF      -0.034455   0.003773   -9.132  < 2e-16 ***
colorG      -0.055399   0.003653  -15.166  < 2e-16 ***
colorH      -0.189859   0.003917  -48.468  < 2e-16 ***
colorI      -0.286928   0.004383  -65.467  < 2e-16 ***
colorJ      -0.425038   0.005417  -78.466  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2372 on 53932 degrees of freedom
Multiple R-squared:  0.9454,    Adjusted R-squared:  0.9454 
F-statistic: 1.333e+05 on 7 and 53932 DF,  p-value: < 2.2e-16

Let’s use log() transformations of both predictor and response. Next, experiment with adding a single categorical predictor into the model. Which categorical predictor is best? [Hint - investigate r2 values]

Clarity

Interpret the fitted coefficients for the levels of your chosen categorical predictor. Which level is the reference level? I1 is the reference level.

Which level shows the greatest difference in price from the reference level? [Hints - remember we are regressing the log(price) here, and think about what the presence of the log(carat) predictor implies. We’re not expecting a mathematical explanation]

clarityIF 1.114625 Shows the greatest difference in price from the reference level.

log(carat) predictor -

Extension: Could not get this to run.

library(ggiraphExtra)
model_interaction <- lm(log(price) ~ log(carat) + clarity + log(carat):clarity, data = diamonds_trim)

summary(model_interaction)

Call:
lm(formula = log(price) ~ log(carat) + clarity + log(carat):clarity, 
    data = diamonds_trim)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.92773 -0.12104  0.01212  0.12465  1.51830 

Coefficients:
                      Estimate Std. Error  t value Pr(>|t|)    
(Intercept)           8.513002   0.001789 4758.971  < 2e-16 ***
log(carat)            1.785386   0.002571  694.497  < 2e-16 ***
clarity.L             0.919153   0.006727  136.645  < 2e-16 ***
clarity.Q            -0.226280   0.006399  -35.361  < 2e-16 ***
clarity.C             0.132042   0.005690   23.206  < 2e-16 ***
clarity^4            -0.079977   0.004871  -16.420  < 2e-16 ***
clarity^5             0.048570   0.004140   11.733  < 2e-16 ***
clarity^6             0.019949   0.003482    5.729 1.02e-08 ***
clarity^7             0.047175   0.002764   17.068  < 2e-16 ***
log(carat):clarity.L  0.210322   0.010040   20.949  < 2e-16 ***
log(carat):clarity.Q -0.128191   0.009785  -13.101  < 2e-16 ***
log(carat):clarity.C  0.108289   0.008357   12.959  < 2e-16 ***
log(carat):clarity^4 -0.072731   0.006629  -10.972  < 2e-16 ***
log(carat):clarity^5  0.057509   0.005259   10.935  < 2e-16 ***
log(carat):clarity^6  0.016606   0.004350    3.817 0.000135 ***
log(carat):clarity^7 -0.017355   0.003633   -4.777 1.78e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1877 on 53924 degrees of freedom
Multiple R-squared:  0.9658,    Adjusted R-squared:  0.9658 
F-statistic: 1.014e+05 on 15 and 53924 DF,  p-value: < 2.2e-16
#ggPredict(model_interaction)
#summary(model_interaction)
LS0tCnRpdGxlOiAiSG9tZXdvcmsiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCkxvYWQgdGhlIGRpYW1vbmRzLmNzdiBkYXRhIHNldCBhbmQgdW5kZXJ0YWtlIGFuIGluaXRpYWwgZXhwbG9yYXRpb24gb2YgdGhlIGRhdGEuIFlvdSB3aWxsIGZpbmQgYSBkZXNjcmlwdGlvbiBvZiB0aGUgbWVhbmluZ3Mgb2YgdGhlIHZhcmlhYmxlcyBvbiB0aGUgcmVsZXZhbnQgS2FnZ2xlIHBhZ2UKCmBgYHtyfQpsaWJyYXJ5KHJlYWRyKQpkaWFtb25kcyA8LSByZWFkX2NzdigiZGF0YS9kaWFtb25kcy5jc3YiKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KEdHYWxseSkKZ2dwYWlycyhkaWFtb25kcykKYGBgCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKZ2xpbXBzZShkaWFtb25kcykKYGBgCgpgYGB7cn0Kc3VtbWFyeShkaWFtb25kcykKYGBgCgpgYGB7cn0KbGlicmFyeShwc3ljaCkKcGFpcnMucGFuZWxzKGRpYW1vbmRzW2MoIlgxIiwgImNhcmF0IiwgImN1dCIsICJjb2xvciIsICJjbGFyaXR5IiwgImRlcHRoIiwgInRhYmxlIiwgInByaWNlIildKQpgYGAKCmBgYHtyfQpwYWlycy5wYW5lbHMoZGlhbW9uZHNbYygiWDEiLCAiY2FyYXQiLCAieCIsICJ5IiwgInoiLCAicHJpY2UiKV0pCmBgYAoKV2UgZXhwZWN0IHRoZSBjYXJhdCBvZiB0aGUgZGlhbW9uZHMgdG8gYmUgc3Ryb25nIGNvcnJlbGF0ZWQgd2l0aCB0aGUgcGh5c2ljYWwgZGltZW5zaW9ucyB4LCB5IGFuZCB6LiBVc2UgZ2dwYWlycygpIHRvIGludmVzdGlnYXRlIGNvcnJlbGF0aW9ucyBiZXR3ZWVuIHRoZXNlIGZvdXIgdmFyaWFibGVzLgpgYGB7cn0KcGFpcnMucGFuZWxzKGRpYW1vbmRzW2MoImNhcmF0IiwgIngiLCAieSIsICJ6IildKQpgYGAKClNvLCB3ZSBkbyBmaW5kIHNpZ25pZmljYW50IGNvcnJlbGF0aW9ucy4gTGV04oCZcyBkcm9wIGNvbHVtbnMgeCwgeSBhbmQgeiBmcm9tIHRoZSBkYXRhc2V0LCBpbiBwcmVwYXJhdGlvbiB0byB1c2Ugb25seSBjYXJhdCBnb2luZyBmb3J3YXJkLgoKYGBge3J9CmRpYW1vbmRzX3RyaW0gPC0gZGlhbW9uZHMgJT4lCiAgc2VsZWN0KC1jKCJ4IiwgInkiLCAieiIpKQpgYGAKCmBgYHtyfQpkaWFtb25kc190cmltCmBgYAoKV2UgYXJlIGludGVyZXN0ZWQgaW4gZGV2ZWxvcGluZyBhIHJlZ3Jlc3Npb24gbW9kZWwgZm9yIHRoZSBwcmljZSBvZiBhIGRpYW1vbmQgaW4gdGVybXMgb2YgdGhlIHBvc3NpYmxlIHByZWRpY3RvciB2YXJpYWJsZXMgaW4gdGhlIGRhdGFzZXQuCgpVc2UgZ2dwYWlycygpIHRvIGludmVzdGlnYXRlIGNvcnJlbGF0aW9ucyBiZXR3ZWVuIHByaWNlIGFuZCB0aGUgcHJlZGljdG9ycyAodGhpcyBtYXkgdGFrZSBhIHdoaWxlIHRvIHJ1biwgZG9u4oCZdCB3b3JyeSwgbWFrZSBjb2ZmZWUgb3Igc29tZXRoaW5nKS4KClBlcmZvcm0gZnVydGhlciBnZ3Bsb3QgdmlzdWFsaXNhdGlvbnMgb2YgYW55IHNpZ25pZmljYW50IGNvcnJlbGF0aW9ucyB5b3UgZmluZC4KCmBgYHtyfQpnZ3BhaXJzKGRpYW1vbmRzX3RyaW0pCmBgYAoKYGBge3J9CnBhaXJzLnBhbmVscyhkaWFtb25kc1tjKCJjYXJhdCIsICJjdXQiLCAiY2xhcml0eSIsICJjb2xvciIsICJwcmljZSIpXSkKYGBgCgpgYGB7cn0KZGlhbW9uZHNfdHJpbSAlPiUKICBnZ3Bsb3QoYWVzKHggPSBjbGFyaXR5LCB5ID0gcHJpY2UpKSArCiAgZ2VvbV9ib3hwbG90KCkKYGBgCgpgYGB7cn0KZGlhbW9uZHNfdHJpbSAlPiUKICBnZ3Bsb3QoYWVzKHggPSBjdXQsIHkgPSBwcmljZSkpICsKICBnZW9tX2JveHBsb3QoKQpgYGAKCmBgYHtyfQpkaWFtb25kc190cmltICU+JQogIGdncGxvdChhZXMoeCA9IGNvbG9yLCB5ID0gcHJpY2UpKSArCiAgZ2VvbV9ib3hwbG90KCkKYGBgClNob3J0bHkgd2UgbWF5IHRyeSBhIHJlZ3Jlc3Npb24gZml0IHVzaW5nIG9uZSBvciBtb3JlIG9mIHRoZSBjYXRlZ29yaWNhbCBwcmVkaWN0b3JzIGN1dCwgY2xhcml0eSBhbmQgY29sb3IsIHNvIGxldOKAmXMgaW52ZXN0aWdhdGUgdGhlc2UgcHJlZGljdG9yczoKCkludmVzdGlnYXRlIHRoZSBmYWN0b3IgbGV2ZWxzIG9mIHRoZXNlIHByZWRpY3RvcnMuIEhvdyBtYW55IGR1bW15IHZhcmlhYmxlcyBkbyB5b3UgZXhwZWN0IGZvciBlYWNoIG9mIHRoZW0/CgpVc2UgdGhlIGR1bW15X2NvbHMoKSBmdW5jdGlvbiBpbiB0aGUgZmFzdER1bW1pZXMgcGFja2FnZSB0byBnZW5lcmF0ZSBkdW1taWVzIGZvciB0aGVzZSBwcmVkaWN0b3JzIGFuZCBjaGVjayB0aGUgbnVtYmVyIG9mIGR1bW1pZXMgaW4gZWFjaCBjYXNlLgoKY2xhcml0eSA9IDgKY3V0ID0gNQpjb2xvciA9IDcKCmBgYHtyfQpsaWJyYXJ5KGZhc3REdW1taWVzKQpgYGAKCmBgYHtyfQpkaWFtb25kc190cmltIDwtIGR1bW15X2NvbHMoZGlhbW9uZHNfdHJpbSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBzZWxlY3RfY29sdW1ucyA9IGMoImNsYXJpdHkiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZW1vdmVfZmlyc3RfZHVtbXkgPSBUUlVFKQoKYGBgCgoKYGBge3J9CmRpYW1vbmRzX3RyaW0gPC0gZHVtbXlfY29scyhkaWFtb25kc190cmltLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNlbGVjdF9jb2x1bW5zID0gYygiY3V0IiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVtb3ZlX2ZpcnN0X2R1bW15ID0gVFJVRSkKYGBgCgoKYGBge3J9CmRpYW1vbmRzX3RyaW0gPC0gZHVtbXlfY29scyhkaWFtb25kc190cmltLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNlbGVjdF9jb2x1bW5zID0gYygiY29sb3IiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZW1vdmVfZmlyc3RfZHVtbXkgPSBUUlVFKQpgYGAKCmBgYHtyfQpkaWFtb25kc190cmltCmBgYApHb2luZyBmb3J3YXJkIHdl4oCZbGwgbGV0IFIgaGFuZGxlIGR1bW15IHZhcmlhYmxlIGNyZWF0aW9uIGZvciBjYXRlZ29yaWNhbCBwcmVkaWN0b3JzIGluIHJlZ3Jlc3Npb24gZml0dGluZyAocmVtZW1iZXIgbG0oKSB3aWxsIGdlbmVyYXRlIHRoZSBjb3JyZWN0IG51bWJlcnMgb2YgZHVtbXkgbGV2ZWxzIGF1dG9tYXRpY2FsbHksIGFic29yYmluZyBvbmUgb2YgdGhlIGxldmVscyBpbnRvIHRoZSBpbnRlcmNlcHQgYXMgYSByZWZlcmVuY2UgbGV2ZWwpCgpGaXJzdCwgd2XigJlsbCBzdGFydCB3aXRoIHNpbXBsZSBsaW5lYXIgcmVncmVzc2lvbi4gUmVncmVzcyBwcmljZSBvbiBjYXJhdCBhbmQgY2hlY2sgdGhlIHJlZ3Jlc3Npb24gZGlhZ25vc3RpY3MuCgpSdW4gYSByZWdyZXNzaW9uIHdpdGggb25lIG9yIGJvdGggb2YgdGhlIHByZWRpY3RvciBhbmQgcmVzcG9uc2UgdmFyaWFibGVzIGxvZygpIHRyYW5zZm9ybWVkIGFuZCByZWNoZWNrIHRoZSBkaWFnbm9zdGljcy4gRG8geW91IHNlZSBhbnkgaW1wcm92ZW1lbnQ/CgpMZXTigJlzIHVzZSBsb2coKSB0cmFuc2Zvcm1hdGlvbnMgb2YgYm90aCBwcmVkaWN0b3IgYW5kIHJlc3BvbnNlLiBOZXh0LCBleHBlcmltZW50IHdpdGggYWRkaW5nIGEgc2luZ2xlIGNhdGVnb3JpY2FsIHByZWRpY3RvciBpbnRvIHRoZSBtb2RlbC4gV2hpY2ggY2F0ZWdvcmljYWwgcHJlZGljdG9yIGlzIGJlc3Q/IFtIaW50IC0gaW52ZXN0aWdhdGUgcjIKIHZhbHVlc10KCkludGVycHJldCB0aGUgZml0dGVkIGNvZWZmaWNpZW50cyBmb3IgdGhlIGxldmVscyBvZiB5b3VyIGNob3NlbiBjYXRlZ29yaWNhbCBwcmVkaWN0b3IuIFdoaWNoIGxldmVsIGlzIHRoZSByZWZlcmVuY2UgbGV2ZWw/IFdoaWNoIGxldmVsIHNob3dzIHRoZSBncmVhdGVzdCBkaWZmZXJlbmNlIGluIHByaWNlIGZyb20gdGhlIHJlZmVyZW5jZSBsZXZlbD8gW0hpbnRzIC0gcmVtZW1iZXIgd2UgYXJlIHJlZ3Jlc3NpbmcgdGhlIGxvZyhwcmljZSkgaGVyZSwgYW5kIHRoaW5rIGFib3V0IHdoYXQgdGhlIHByZXNlbmNlIG9mIHRoZSBsb2coY2FyYXQpIHByZWRpY3RvciBpbXBsaWVzLiBXZeKAmXJlIG5vdCBleHBlY3RpbmcgYSBtYXRoZW1hdGljYWwgZXhwbGFuYXRpb25dCgpgYGB7cn0KbGluX21vZGVsIDwtIGxtKHByaWNlIH4gY2FyYXQsIGRhdGEgPSBkaWFtb25kc190cmltKQoKcGxvdChsaW5fbW9kZWwpCmBgYAoKYGBge3J9CnN1bW1hcnkobGluX21vZGVsKQpgYGAKCmBgYHtyfQpsaW5fbW9kZWxfbG9nX3ByaWNlIDwtIGxtKGxvZyhwcmljZSkgfiBjYXJhdCwgZGF0YSA9IGRpYW1vbmRzX3RyaW0pCgpwbG90KGxpbl9tb2RlbF9sb2dfcHJpY2UpCnN1bW1hcnkobGluX21vZGVsX2xvZ19wcmljZSkKYGBgCgpgYGB7cn0KbGluX21vZGVsX2xvZ19wcmljZV9jYXJhdCA8LSBsbShsb2cocHJpY2UpIH4gbG9nKGNhcmF0KSwgZGF0YSA9IGRpYW1vbmRzX3RyaW0pCgpwbG90KGxpbl9tb2RlbF9sb2dfcHJpY2VfY2FyYXQpCnN1bW1hcnkobGluX21vZGVsX2xvZ19wcmljZV9jYXJhdCkKYGBgCgpgYGB7cn0KbGluX21vZGVsX2xvZ19wcmljZV9jYXJhdF9jbGFyaXR5IDwtIGxtKGxvZyhwcmljZSkgfiBsb2coY2FyYXQpICsgY2xhcml0eSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gZGlhbW9uZHNfdHJpbSkKCnBsb3QobGluX21vZGVsX2xvZ19wcmljZV9jYXJhdF9jbGFyaXR5KQpzdW1tYXJ5KGxpbl9tb2RlbF9sb2dfcHJpY2VfY2FyYXRfY2xhcml0eSkKYGBgCgpgYGB7cn0KbGluX21vZGVsX2xvZ19wcmljZV9jYXJhdF9jdXQgPC0gbG0obG9nKHByaWNlKSB+IGxvZyhjYXJhdCkgKyBjdXQsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YSA9IGRpYW1vbmRzX3RyaW0pCgpwbG90KGxpbl9tb2RlbF9sb2dfcHJpY2VfY2FyYXRfY3V0KQpzdW1tYXJ5KGxpbl9tb2RlbF9sb2dfcHJpY2VfY2FyYXRfY3V0KQpgYGAKCmBgYHtyfQpsaW5fbW9kZWxfbG9nX3ByaWNlX2NhcmF0X2NvbG9yIDwtIGxtKGxvZyhwcmljZSkgfiBsb2coY2FyYXQpICsgY29sb3IsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YSA9IGRpYW1vbmRzX3RyaW0pCgpwbG90KGxpbl9tb2RlbF9sb2dfcHJpY2VfY2FyYXRfY29sb3IpCnN1bW1hcnkobGluX21vZGVsX2xvZ19wcmljZV9jYXJhdF9jb2xvcikKYGBgCgpMZXTigJlzIHVzZSBsb2coKSB0cmFuc2Zvcm1hdGlvbnMgb2YgYm90aCBwcmVkaWN0b3IgYW5kIHJlc3BvbnNlLiBOZXh0LCBleHBlcmltZW50IHdpdGggYWRkaW5nIGEgc2luZ2xlIGNhdGVnb3JpY2FsIHByZWRpY3RvciBpbnRvIHRoZSBtb2RlbC4gV2hpY2ggY2F0ZWdvcmljYWwgcHJlZGljdG9yIGlzIGJlc3Q/IFtIaW50IC0gaW52ZXN0aWdhdGUgcjIKIHZhbHVlc10KCioqQ2xhcml0eSoqCgpJbnRlcnByZXQgdGhlIGZpdHRlZCBjb2VmZmljaWVudHMgZm9yIHRoZSBsZXZlbHMgb2YgeW91ciBjaG9zZW4gY2F0ZWdvcmljYWwgcHJlZGljdG9yLiBXaGljaCBsZXZlbCBpcyB0aGUgcmVmZXJlbmNlIGxldmVsPyAKSTEgaXMgdGhlIHJlZmVyZW5jZSBsZXZlbC4KCldoaWNoIGxldmVsIHNob3dzIHRoZSBncmVhdGVzdCBkaWZmZXJlbmNlIGluIHByaWNlIGZyb20gdGhlIHJlZmVyZW5jZSBsZXZlbD8gW0hpbnRzIC0gcmVtZW1iZXIgd2UgYXJlIHJlZ3Jlc3NpbmcgdGhlIGxvZyhwcmljZSkgaGVyZSwgYW5kIHRoaW5rIGFib3V0IHdoYXQgdGhlIHByZXNlbmNlIG9mIHRoZSBsb2coY2FyYXQpIHByZWRpY3RvciBpbXBsaWVzLiBXZeKAmXJlIG5vdCBleHBlY3RpbmcgYSBtYXRoZW1hdGljYWwgZXhwbGFuYXRpb25dCgoqKmNsYXJpdHlJRiAgIDEuMTE0NjI1KiogClNob3dzIHRoZSBncmVhdGVzdCBkaWZmZXJlbmNlIGluIHByaWNlIGZyb20gdGhlIHJlZmVyZW5jZSBsZXZlbC4KCmxvZyhjYXJhdCkgcHJlZGljdG9yIC0KCgpFeHRlbnNpb246CkNvdWxkIG5vdCBnZXQgdGhpcyB0byBydW4uCmBgYHtyfQpsaWJyYXJ5KGdnaXJhcGhFeHRyYSkKbW9kZWxfaW50ZXJhY3Rpb24gPC0gbG0obG9nKHByaWNlKSB+IGxvZyhjYXJhdCkgKyBjbGFyaXR5ICsgbG9nKGNhcmF0KTpjbGFyaXR5LCBkYXRhID0gZGlhbW9uZHNfdHJpbSkKCnN1bW1hcnkobW9kZWxfaW50ZXJhY3Rpb24pCgojZ2dQcmVkaWN0KG1vZGVsX2ludGVyYWN0aW9uKQojc3VtbWFyeShtb2RlbF9pbnRlcmFjdGlvbikKYGBgCgo=